clear all

*-请先执行 B6_RDD.do 文档中 “global path ...” 至 "cd $D" 那几行，设定文件路径

cd "$ex\Grade5"

shellout "econ4136seminar10_rd.pdf"  //数据背景和问题设定

use "grade5.dta", clear

// 1
reg avgmath classize, robust cluster(schlcode)
estimates store olsnox
reg avgmath classize enrollment, robust cluster(schlcode)
estimates store olscov1
reg avgmath classize disadv, robust cluster(schlcode)
estimates store olscov2
reg avgmath classize disadv enrollment, robust cluster(schlcode)
estimates store olscov3
su avgmath classize disadv
esttab olsnox olscov?, s(N r2) nogap b(%4.3f) t(%3.2f)

// 2
*-Start with Sub-sample: schools with enrollment between 20 and 60 students
keep if inrange(enrollment,20,60)
gen d = enrollment >= 40.5 & enrollment != .
gen enroll_recent = enrollment - 40.5
gen enroll_recent1 = enroll_recent * d
br enroll* d
reg avgmath d disadv enroll_recent*, robust cluster(schlcode)

// 3
dropvars x y0 y1 rdd
range x 40 41  11
lpoly avgmath enrollment if enrollment <= 40.5, gen(y0) at(x) deg(3) //nograph
lpoly avgmath enrollment if enrollment >= 40.5, gen(y1) at(x) deg(3) //nograph
gen rdd = y0 - y1
list x rdd if x != .
	/* You need to extrapolate up to the discontinuity. Where that is btw
	   40 and 41 is not obvious. The best choice is usually to extrapolate
	   to the midpoint, ie. 40.5. Note how this is important for your estimate.
	*/
bys enrollment: egen yr = mean(avgmath) // This makes the graph easier to read
bys enrollment: replace yr = . if _n == 1
br enrollment avgmath yr 

*-GRAPH
  dropvars x y0 y1
  range x 20 60 41
  lpoly avgmath enrollment if enrollment <= 40.5, gen(y0) at(x) nograph deg(1)
  lpoly avgmath enrollment if enrollment >= 40.5, gen(y1) at(x) nograph deg(1)
*------------------Graphing--------------------begin-------------------
two (scatter avgmath enrollment, mc(OD) mcol(red) msize(*0.6)) ///
	(scatter yr enrollment, mc(OT) mcol(blue) msize(*0.8)) ///
	(line y0 x if x <= 40.5, lcolor(black) lwidth(medthick)) ///
	(line y1 x if x >= 40.5, lcolor(black) lwidth(medthick)) ///
	, /// options
	legend(off) scheme(s1mono) name(sharprd, replace) ///
	ytit("Average math grade") xtit("Enrollment") ///
	xline(40.5, lpattern(shortdash) lcol(black)) 
*------------------Graphing--------------------over--------------------


// 4
ivregress 2sls avgmath (classize = d) disadv enroll_recent*, robust cluster(schlcode)

// 5
* From hereon out, I use the same functional form for the running var
* on both sides of the discontinuities, for simplicity 
*-Use full sample
use "grade5.dta", clear
gen predclassize = enrollment/(int((enrollment - 1)/40) + 1)
label var predclassize "Predicted class size"
 br schlcode enrollment classize predclassize
 list schlcode enrollment classize predclassize in 1/200, sepby(schlcode)
 
bys enrollment: egen classr = mean(classize)
bys enrollment: replace classr = . if _n == 1
label var classr "Average class size"

*------------------Graphing--------------------begin-------------------
two (scatter classr enrollment) ///
	(line predclassize enrollment, lcolor(black) lwidth(medthick)) ///
	, ytitle("Class size") ///
	legend(cols(1) pos(5) ring(0)) name(classize, replace) ///
	xline(40.5 80.5 120.5 160.5 200.5, lpattern(shortdash) lcol(black))
*------------------Graphing--------------------over--------------------


// 6
ivregress 2sls avgmath (classize = predclassize) enrollment, robust cluster(schlcode)
estimates store frdnox

// 7
ivregress 2sls avgmath (classize = predclassize) disadv enrollment, robust cluster(schlcode)
estimates store frdcov
esttab frdnox frdcov, se wide

// 8
twoway histogram enrollment, width(1) ///
  , xline(40.5 80.5 120.5 160.5 200.5, lpattern(shortdash) lcol(black)) ///
  name(hist, replace)

// 9
gen xenr20 = autocode(enrollment , 12, 0 , 240)
preserve
collapse avgm classr, by(xenr20)
label var avgmath "Average math score"
label var xenr20 "Enrollment"
label var classr "Average class size"
twoway (conn classr xenr) (conn avgm xenr, yaxis(2)) ///
	, name(avgs, replace) legend(ring(0) pos(4) cols(1))
restore

// 10
reg classize enrollment
predict classlin
gen enrollsq = enrollment*enrollment
reg classize enrollment enrollsq
predict classq
preserve
collapse avgm classr classq classlin, by(xenr20)
label var classq "Quadratic prediction"
label var classlin "Linear prediction"
label var classr "Average class size"
label var avgmath "Average math score"
label var xenr20 "Enrollment"
twoway	(line classlin classq xenr20, ytitle("Class size")) ///
		(conn classr xenr20) ///
		(conn avgmath xenr20, yaxis(2)) ///
		, name(funcform, replace) legend(ring(0) pos(4) cols(1))
restore

// 11
ivreg avgmath (classize = predclassize) disadv enrollment enrollsq , robust cluster(schlcode)
estimates store frdsq
gen enrollcub = enrollsq * enrollment
ivreg avgmath (classize = predclassize) disadv enrollment enrollsq enrollcub, robust cluster(schlcode)
estimates store frdcub
esttab frdcov frdsq frdcub

// 12
ivreg disadv (classize = predclassize) enrollment, robust cluster(schlcode)
estimates store frdplac
esttab frdnox frdplac



*-原始数据和链接:
*use http://www.uio.no/studier/emner/sv/oekonomi/ECON4136/h14/seminars/data/grade5.dta
/*             数据概况
contains data on class size, average math and verbal test scores 
for 2,019 5th grade classes in 1,002 public schools in Israel, as well as 
enrollment data for these schools and percentage disadvantaged pupils.


数据来源: 以色列的 1002 所公立学校中，2019个五年级的班级
变量: 班级规模，平均数学, 语言测试成绩, 学校的招生数据, 贫困生的百分比

*/
